1.1 Table1
rm(list=ls())
# Load necessary libraries
library(readxl)
library(dplyr)
library(tidyr)
library(ggplot2)
library(openxlsx)
library(extrafont)
library(reshape2)
library(car)
library(cowplot)
# Load fonts for Windows device
loadfonts(device = "win")
sheet_names <- excel_sheets("Data.xlsx")
# Display the sheet names
#print(sheet_names)
read_and_assign_sheets <- function(file_path) {
# Get the sheet names
sheet_names <- excel_sheets(file_path)
# Loop through each sheet name
for (sheet in sheet_names) {
# Read the data from each sheet
data <- read.xlsx(file_path, sheet = sheet, colNames = TRUE)
data$Con[is.na(data$Con)] <- paste0("rep", seq_len(sum(is.na(data$Con))))
# Assign the data to a variable with the name of the sheet
assign(sheet, data, envir = .GlobalEnv)
}
}
file_path <- "Data.xlsx"
read_and_assign_sheets(file_path)
# Define a function to perform normality and variance tests
perform_tests <- function(data) {
# Exclude rows where "Con" is "Average" or "STDEV"
clean_data <- data %>%
filter(!Con %in% c("Average", "STDEV"))
# Ensure all columns except "Con" are numeric for the normality test
clean_data_numeric <- clean_data %>%
select_if(is.numeric)
# Perform Shapiro-Wilk test for normality on numeric columns
normality_results <- apply(clean_data_numeric, 2, function(x) shapiro.test(x)$p.value)
# Perform Levene's test for equality of variance
# Melt the data to long format for Levene's test
long_data <- melt(clean_data, id.vars = "Con", variable.name = "Concentration", value.name = "Value")
# Ensure the 'Value' column is numeric for Levene's test
long_data <- long_data %>%
filter(!is.na(Value)) %>%
mutate(Value = as.numeric(Value)) # Convert 'Value' to numeric
# Perform Levene's test
levene_test <- leveneTest(Value ~ Concentration, data = long_data)
# Return the results as a list
results <- list(
normality_p_values = normality_results,
levene_test = levene_test
)
return(results)
}
# Example usage for all your datasets
sheet_names <- excel_sheets("Data.xlsx")
for (sheet in sheet_names) {
test_result <- perform_tests(get(sheet))
print(paste("Results for", sheet))
print(test_result)
}
## [1] "Results for AIa"
## $normality_p_values
## 1000 500 100 50 10 5 1
## 0.5633650 0.8521294 0.9569349 0.6875491 0.7476451 0.9609992 0.4311201
##
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 6 1.1244 0.3738
## 28
##
## [1] "Results for Arg"
## $normality_p_values
## 1000 500 100 50 10 5 1
## 0.5601209 0.5497490 0.4985203 0.4846414 0.8414910 0.9061812 0.7623688
##
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 6 0.4331 0.8505
## 28
##
## [1] "Results for Asp"
## $normality_p_values
## 1000 500 100 50 10 5 1
## 0.56092773 0.61274594 0.01063096 0.26294865 0.66713049 0.97872100 0.98559315
##
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 6 3.47 0.01086 *
## 28
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## [1] "Results for Aspg"
## $normality_p_values
## 1000 500 100 50 10 5 1
## 0.1895405 0.4612731 0.7480872 0.6306162 0.3914674 0.9997378 0.5579808
##
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 6 0.3975 0.8743
## 28
##
## [1] "Results for Cys"
## $normality_p_values
## 1000 500 100 50 10 5 1
## 0.4176463 0.5439844 0.3189247 0.7849049 0.9344402 0.6357983 0.7654510
##
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 6 1.1217 0.3753
## 28
##
## [1] "Results for Gln"
## $normality_p_values
## 1000 500 100 50 10 5 1
## 0.7945513 0.2110115 0.5767714 0.4026715 0.9804080 0.9580997 0.6406464
##
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 6 2.9551 0.02311 *
## 28
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## [1] "Results for Glu"
## $normality_p_values
## 1000 500 100 50 10 5 1
## 0.05905854 0.97397379 0.95039490 0.72586601 0.60842902 0.67993881 0.41082688
##
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 6 1.7057 0.1566
## 28
##
## [1] "Results for Gly"
## $normality_p_values
## 1000 500 100 50 10 5
## 0.64977337 0.82996078 0.72502949 0.06877027 0.49841512 0.28417714
##
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 6 1.0254 0.4303
## 27
##
## [1] "Results for His"
## $normality_p_values
## 1000 500 100 50 10 5 1
## 0.4015037 0.9562376 0.1896328 0.8976194 0.8852202 0.8465223 0.6661249
##
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 6 2.9201 0.02436 *
## 28
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## [1] "Results for Iso-Leu"
## $normality_p_values
## 1000 500 100 50 10 5 1
## 0.24824949 0.83193675 0.15731281 0.98381524 0.36937051 0.08378289 0.25961350
##
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 6 2.1131 0.08341 .
## 28
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## [1] "Results for Leu"
## $normality_p_values
## 1000 500 100 50 10 5 1
## 0.7246126 0.8555970 0.7771820 0.8814065 0.4985817 0.2186498 0.2491243
##
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 6 2.4986 0.04608 *
## 28
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## [1] "Results for Lys"
## $normality_p_values
## 1000 500 100 50 10 5 1
## 0.5958671 0.4273715 0.3453429 0.4096900 0.6429562 0.9684607 0.5856278
##
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 6 0.7478 0.6162
## 28
##
## [1] "Results for Met"
## $normality_p_values
## 1000 500 100 50 10 5 1
## 0.9837256 0.9342727 0.7228427 0.7532409 0.2495740 0.7592478 0.6621991
##
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 6 0.4206 0.859
## 28
##
## [1] "Results for Phe"
## $normality_p_values
## 1000 500 100 50 10 5 1
## 0.4969510 0.6873629 0.5100695 0.8256046 0.3468916 0.8609741 0.4446205
##
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 6 2.3299 0.05969 .
## 28
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## [1] "Results for Pro"
## $normality_p_values
## 1000 500 100 50 10 5 1
## 0.5314929 0.9404085 0.9792745 0.3100920 0.9206538 0.9611291 0.3715548
##
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 6 0.9772 0.4589
## 28
##
## [1] "Results for Ser"
## $normality_p_values
## 1000 500 100 50 10 5 1
## 0.9517442 0.8950489 0.9919508 0.6675814 0.7835833 0.7966734 0.7496777
##
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 6 0.2053 0.9722
## 28
##
## [1] "Results for Thr"
## $normality_p_values
## 1000 500 100 50 10 5 1
## 0.4245250 0.6199374 0.8493000 0.8251067 0.9814672 0.3021418 0.6025638
##
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 6 1.4121 0.245
## 28
##
## [1] "Results for Trp"
## $normality_p_values
## 1000 500 100 50 10 5 1
## 0.29630735 0.89266526 0.94870419 0.96297362 0.73143774 0.88658559 0.07746806
##
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 6 0.2206 0.9669
## 28
##
## [1] "Results for Tyr"
## $normality_p_values
## 1000 500 100 50 10 5 1
## 0.7789145 0.8180186 0.4158107 0.2857817 0.7472215 0.8260023 0.9818407
##
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 6 1.6522 0.17
## 28
##
## [1] "Results for Val"
## $normality_p_values
## 1000 500 100 50 10 5 1
## 0.4859096 0.6683519 0.7178185 0.9972318 0.5979738 0.8478458 0.3870650
##
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 6 1.6111 0.1811
## 28
1.2 Table2
perform_anova <- function(sheet_name, data) {
# Exclude rows where "Con" is "Average" or "STDEV"
clean_data <- data %>%
filter(!Con %in% c("Average", "STDEV"))
# Melt the data to long format for analysis
long_data <- melt(clean_data, id.vars = "Con", variable.name = "Concentration", value.name = "Value") %>%
filter(!is.na(Value)) %>%
mutate(Value = as.numeric(Value))
# List of sheets that require Welch's ANOVA
welch_sheets <- c("Asp", "Gln", "His", "Leu")
if (sheet_name %in% welch_sheets) {
# Perform Welch's ANOVA using oneway.test()
welch_anova <- oneway.test(Value ~ Concentration, data = long_data, var.equal = FALSE)
# Manually calculate Sum of Squares (Sum Sq) and Mean Squares (Mean Sq) for Welch's ANOVA
group_means <- tapply(long_data$Value, long_data$Concentration, mean)
overall_mean <- mean(long_data$Value)
group_sizes <- tapply(long_data$Value, long_data$Concentration, length)
# Sum of Squares Between Groups (SSB)
SSB <- sum(group_sizes * (group_means - overall_mean)^2)
# Sum of Squares Within Groups (SSW) (Welch's ANOVA doesn't assume equal variances)
SSW <- sum((long_data$Value - rep(group_means, times = group_sizes))^2)
# Degrees of freedom
df_between <- length(unique(long_data$Concentration)) - 1
df_within <- welch_anova$parameter[1] # Welch's ANOVA df (adjusted for unequal variances)
# Mean Squares Between (MSB) and Mean Squares Within (MSW)
MSB <- SSB / df_between
MSW <- SSW / df_within
result <- list(
sheet = sheet_name,
test_type = "Welch's ANOVA",
F_statistic = welch_anova$statistic,
p_value = welch_anova$p.value,
Sum_Sq_Between = SSB,
Mean_Sq_Between = MSB,
Sum_Sq_Within = SSW,
Mean_Sq_Within = MSW,
df_between = df_between,
df_within = df_within
)
} else {
# Perform regular ANOVA
anova_result <- aov(Value ~ Concentration, data = long_data)
anova_summary <- summary(anova_result)
result <- list(
sheet = sheet_name,
test_type = "Regular ANOVA",
anova_result = anova_summary
)
}
return(result)
}
for (sheet in sheet_names) {
data <- get(sheet) # Fetch the data frame (e.g., Asp, Gln, His, etc.)
test_result <- perform_anova(sheet, data)
# Print the result
print(paste("Results for", sheet))
print(test_result)
}
## [1] "Results for AIa"
## $sheet
## [1] "AIa"
##
## $test_type
## [1] "Regular ANOVA"
##
## $anova_result
## Df Sum Sq Mean Sq F value Pr(>F)
## Concentration 6 30692 5115 851.5 <2e-16 ***
## Residuals 28 168 6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## [1] "Results for Arg"
## $sheet
## [1] "Arg"
##
## $test_type
## [1] "Regular ANOVA"
##
## $anova_result
## Df Sum Sq Mean Sq F value Pr(>F)
## Concentration 6 18102 3016.9 189.7 <2e-16 ***
## Residuals 28 445 15.9
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## [1] "Results for Asp"
## $sheet
## [1] "Asp"
##
## $test_type
## [1] "Welch's ANOVA"
##
## $F_statistic
## F
## 809.7435
##
## $p_value
## [1] 4.361223e-15
##
## $Sum_Sq_Between
## [1] 102857.6
##
## $Mean_Sq_Between
## [1] 17142.94
##
## $Sum_Sq_Within
## [1] 291.8451
##
## $Mean_Sq_Within
## num df
## 48.64085
##
## $df_between
## [1] 6
##
## $df_within
## num df
## 6
##
## [1] "Results for Aspg"
## $sheet
## [1] "Aspg"
##
## $test_type
## [1] "Regular ANOVA"
##
## $anova_result
## Df Sum Sq Mean Sq F value Pr(>F)
## Concentration 6 74035 12339 1282 <2e-16 ***
## Residuals 28 269 10
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## [1] "Results for Cys"
## $sheet
## [1] "Cys"
##
## $test_type
## [1] "Regular ANOVA"
##
## $anova_result
## Df Sum Sq Mean Sq F value Pr(>F)
## Concentration 6 203266 33878 2087 <2e-16 ***
## Residuals 28 455 16
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## [1] "Results for Gln"
## $sheet
## [1] "Gln"
##
## $test_type
## [1] "Welch's ANOVA"
##
## $F_statistic
## F
## 1244.193
##
## $p_value
## [1] 3.198327e-16
##
## $Sum_Sq_Between
## [1] 194427.3
##
## $Mean_Sq_Between
## [1] 32404.55
##
## $Sum_Sq_Within
## [1] 322.0786
##
## $Mean_Sq_Within
## num df
## 53.67977
##
## $df_between
## [1] 6
##
## $df_within
## num df
## 6
##
## [1] "Results for Glu"
## $sheet
## [1] "Glu"
##
## $test_type
## [1] "Regular ANOVA"
##
## $anova_result
## Df Sum Sq Mean Sq F value Pr(>F)
## Concentration 6 239702 39950 2843 <2e-16 ***
## Residuals 28 393 14
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## [1] "Results for Gly"
## $sheet
## [1] "Gly"
##
## $test_type
## [1] "Regular ANOVA"
##
## $anova_result
## Df Sum Sq Mean Sq F value Pr(>F)
## Concentration 6 23033 3839 1022 <2e-16 ***
## Residuals 27 101 4
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 1 observation deleted due to missingness
##
## [1] "Results for His"
## $sheet
## [1] "His"
##
## $test_type
## [1] "Welch's ANOVA"
##
## $F_statistic
## F
## 1455.179
##
## $p_value
## [1] 8.626972e-17
##
## $Sum_Sq_Between
## [1] 138984.6
##
## $Mean_Sq_Between
## [1] 23164.1
##
## $Sum_Sq_Within
## [1] 404.4086
##
## $Mean_Sq_Within
## num df
## 67.40143
##
## $df_between
## [1] 6
##
## $df_within
## num df
## 6
##
## [1] "Results for Iso-Leu"
## $sheet
## [1] "Iso-Leu"
##
## $test_type
## [1] "Regular ANOVA"
##
## $anova_result
## Df Sum Sq Mean Sq F value Pr(>F)
## Concentration 6 60077 10013 2425 <2e-16 ***
## Residuals 28 116 4
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## [1] "Results for Leu"
## $sheet
## [1] "Leu"
##
## $test_type
## [1] "Welch's ANOVA"
##
## $F_statistic
## F
## 956.2051
##
## $p_value
## [1] 1.946673e-15
##
## $Sum_Sq_Between
## [1] 98358.64
##
## $Mean_Sq_Between
## [1] 16393.11
##
## $Sum_Sq_Within
## [1] 216.175
##
## $Mean_Sq_Within
## num df
## 36.02916
##
## $df_between
## [1] 6
##
## $df_within
## num df
## 6
##
## [1] "Results for Lys"
## $sheet
## [1] "Lys"
##
## $test_type
## [1] "Regular ANOVA"
##
## $anova_result
## Df Sum Sq Mean Sq F value Pr(>F)
## Concentration 6 88281 14713 852.5 <2e-16 ***
## Residuals 28 483 17
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## [1] "Results for Met"
## $sheet
## [1] "Met"
##
## $test_type
## [1] "Regular ANOVA"
##
## $anova_result
## Df Sum Sq Mean Sq F value Pr(>F)
## Concentration 6 56195 9366 242.7 <2e-16 ***
## Residuals 28 1080 39
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## [1] "Results for Phe"
## $sheet
## [1] "Phe"
##
## $test_type
## [1] "Regular ANOVA"
##
## $anova_result
## Df Sum Sq Mean Sq F value Pr(>F)
## Concentration 6 148641 24774 1383 <2e-16 ***
## Residuals 28 502 18
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## [1] "Results for Pro"
## $sheet
## [1] "Pro"
##
## $test_type
## [1] "Regular ANOVA"
##
## $anova_result
## Df Sum Sq Mean Sq F value Pr(>F)
## Concentration 6 31849 5308 924.4 <2e-16 ***
## Residuals 28 161 6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## [1] "Results for Ser"
## $sheet
## [1] "Ser"
##
## $test_type
## [1] "Regular ANOVA"
##
## $anova_result
## Df Sum Sq Mean Sq F value Pr(>F)
## Concentration 6 10728 1788.0 267.4 <2e-16 ***
## Residuals 28 187 6.7
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## [1] "Results for Thr"
## $sheet
## [1] "Thr"
##
## $test_type
## [1] "Regular ANOVA"
##
## $anova_result
## Df Sum Sq Mean Sq F value Pr(>F)
## Concentration 6 77954 12992 1488 <2e-16 ***
## Residuals 28 244 9
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## [1] "Results for Trp"
## $sheet
## [1] "Trp"
##
## $test_type
## [1] "Regular ANOVA"
##
## $anova_result
## Df Sum Sq Mean Sq F value Pr(>F)
## Concentration 6 31820 5303 256.3 <2e-16 ***
## Residuals 28 579 21
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## [1] "Results for Tyr"
## $sheet
## [1] "Tyr"
##
## $test_type
## [1] "Regular ANOVA"
##
## $anova_result
## Df Sum Sq Mean Sq F value Pr(>F)
## Concentration 6 146631 24439 1861 <2e-16 ***
## Residuals 28 368 13
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## [1] "Results for Val"
## $sheet
## [1] "Val"
##
## $test_type
## [1] "Regular ANOVA"
##
## $anova_result
## Df Sum Sq Mean Sq F value Pr(>F)
## Concentration 6 39166 6528 936.5 <2e-16 ***
## Residuals 28 195 7
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
1.3 Table3
###________________________
#regression
#_________________________
perform_regression_on_means_with_rmse <- function(data) {
# Filter the row where "Con" is "Average" to use as Y
average_row <- data %>% filter(Con == "Average")
# Get the concentration values (column names) as numeric
concentration_values <- as.numeric(names(data)[-1]) # Skip the first column (Con)
# Get the corresponding Y values (means) from the "Average" row, excluding the first column
y_values <- as.numeric(average_row[-1]) # Exclude the "Con" column
# Create a data frame for regression
regression_data <- data.frame(Concentration = concentration_values, Mean = y_values)
# Perform linear regression: Mean (Y) ~ Concentration (X)
lm_model <- lm(Mean ~ Concentration, data = regression_data)
# Compute predictions based on the model
predictions <- predict(lm_model)
# Compute RMSE (Root Mean Squared Error)
residuals <- regression_data$Mean - predictions
rmse <- sqrt(mean(residuals^2))
# Return the summary of the regression model and RMSE
regression_summary <- summary(lm_model)
return(list(regression_summary = regression_summary, RMSE = rmse))
}
for (sheet in sheet_names) {
data <- get(sheet) # Fetch the data frame (e.g., AIa, Asp, etc.)
# Perform regression analysis
regression_result <- perform_regression_on_means_with_rmse(data)
# Print the regression summary and RMSE for the current sheet
print(paste("Regression results for sheet:", sheet))
print(regression_result$regression_summary)
print(paste("RMSE for sheet", sheet, ":", regression_result$RMSE))
}
## [1] "Regression results for sheet: AIa"
##
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
##
## Residuals:
## 1 2 3 4 5 6 7
## -7.254 13.466 4.930 1.697 -2.839 -4.673 -5.327
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.945123 3.647372 10.952 0.000110 ***
## Concentration 0.081966 0.008588 9.544 0.000214 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.992 on 5 degrees of freedom
## Multiple R-squared: 0.948, Adjusted R-squared: 0.9376
## F-statistic: 91.09 on 1 and 5 DF, p-value: 0.0002137
##
## [1] "RMSE for sheet AIa : 6.75485748018442"
## [1] "Regression results for sheet: Arg"
##
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
##
## Residuals:
## 1 2 3 4 5 6 7
## -7.4488 13.9944 4.6878 0.8687 -3.5373 -4.1788 -4.3860
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 90.40034 3.66943 24.636 2.05e-06 ***
## Concentration 0.06170 0.00864 7.141 0.000836 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.041 on 5 degrees of freedom
## Multiple R-squared: 0.9107, Adjusted R-squared: 0.8928
## F-statistic: 51 on 1 and 5 DF, p-value: 0.0008362
##
## [1] "RMSE for sheet Arg : 6.79570909396087"
## [1] "Regression results for sheet: Asp"
##
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
##
## Residuals:
## 1 2 3 4 5 6 7
## -19.185 38.164 4.586 -5.174 -5.982 -6.079 -6.330
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.27703 9.09311 2.560 0.05066 .
## Concentration 0.14649 0.02141 6.842 0.00102 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.93 on 5 degrees of freedom
## Multiple R-squared: 0.9035, Adjusted R-squared: 0.8842
## F-statistic: 46.81 on 1 and 5 DF, p-value: 0.001018
##
## [1] "RMSE for sheet Asp : 16.840244750102"
## [1] "Regression results for sheet: Aspg"
##
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
##
## Residuals:
## 1 2 3 4 5 6 7
## -14.81 21.80 29.39 25.21 -15.35 -21.59 -24.65
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 55.20319 12.08001 4.570 0.0060 **
## Concentration 0.11424 0.02844 4.016 0.0102 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 26.47 on 5 degrees of freedom
## Multiple R-squared: 0.7634, Adjusted R-squared: 0.7161
## F-statistic: 16.13 on 1 and 5 DF, p-value: 0.01016
##
## [1] "RMSE for sheet Aspg : 22.3719321190439"
## [1] "Regression results for sheet: Cys"
##
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
##
## Residuals:
## 1 2 3 4 5 6 7
## -24.152 45.120 14.894 6.095 -11.422 -14.384 -16.151
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 41.60206 12.03304 3.457 0.018094 *
## Concentration 0.20718 0.02833 7.312 0.000749 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 26.37 on 5 degrees of freedom
## Multiple R-squared: 0.9145, Adjusted R-squared: 0.8974
## F-statistic: 53.47 on 1 and 5 DF, p-value: 0.0007494
##
## [1] "RMSE for sheet Cys : 22.2849355741498"
## [1] "Regression results for sheet: Gln"
##
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
##
## Residuals:
## 1 2 3 4 5 6 7
## -19.275 37.571 7.670 -3.200 -7.140 -7.782 -7.845
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.81075 9.18449 1.721 0.145787
## Concentration 0.20630 0.02163 9.539 0.000214 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 20.13 on 5 degrees of freedom
## Multiple R-squared: 0.9479, Adjusted R-squared: 0.9375
## F-statistic: 91 on 1 and 5 DF, p-value: 0.0002142
##
## [1] "RMSE for sheet Gln : 17.0094880026335"
## [1] "Regression results for sheet: Glu"
##
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
##
## Residuals:
## 1 2 3 4 5 6 7
## -26.018 51.556 5.174 -2.679 -8.535 -9.618 -9.880
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.59558 12.29935 1.431 0.211945
## Concentration 0.22618 0.02896 7.810 0.000552 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 26.95 on 5 degrees of freedom
## Multiple R-squared: 0.9242, Adjusted R-squared: 0.9091
## F-statistic: 61 on 1 and 5 DF, p-value: 0.0005516
##
## [1] "RMSE for sheet Glu : 22.7781406860965"
## [1] "Regression results for sheet: Gly"
##
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
##
## Residuals:
## 1 2 3 4 5 6 7
## -9.219 18.355 1.882 -1.986 -2.895 -3.087 -3.050
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24.09999 4.36086 5.526 0.00266 **
## Concentration 0.06988 0.01027 6.806 0.00104 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.556 on 5 degrees of freedom
## Multiple R-squared: 0.9026, Adjusted R-squared: 0.8831
## F-statistic: 46.32 on 1 and 5 DF, p-value: 0.001043
##
## [1] "RMSE for sheet Gly : 8.07622178238542"
## [1] "Regression results for sheet: His"
##
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
##
## Residuals:
## 1 2 3 4 5 6 7
## -20.920 41.148 7.466 -5.706 -7.100 -7.412 -7.476
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 67.64517 9.95724 6.794 0.001052 **
## Concentration 0.17130 0.02345 7.307 0.000752 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21.82 on 5 degrees of freedom
## Multiple R-squared: 0.9144, Adjusted R-squared: 0.8972
## F-statistic: 53.39 on 1 and 5 DF, p-value: 0.0007521
##
## [1] "RMSE for sheet His : 18.4406070271489"
## [1] "Regression results for sheet: Iso-Leu"
##
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
##
## Residuals:
## 1 2 3 4 5 6 7
## -13.074 24.666 7.398 2.071 -5.896 -7.053 -8.112
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.18118 6.41718 2.054 0.09515 .
## Concentration 0.11283 0.01511 7.468 0.00068 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.06 on 5 degrees of freedom
## Multiple R-squared: 0.9177, Adjusted R-squared: 0.9013
## F-statistic: 55.76 on 1 and 5 DF, p-value: 0.0006798
##
## [1] "RMSE for sheet Iso-Leu : 11.8844799218731"
## [1] "Regression results for sheet: Leu"
##
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
##
## Residuals:
## 1 2 3 4 5 6 7
## -17.318 34.535 2.314 -1.803 -5.508 -5.750 -6.469
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 19.42208 8.17985 2.374 0.063606 .
## Concentration 0.14442 0.01926 7.499 0.000667 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.92 on 5 degrees of freedom
## Multiple R-squared: 0.9183, Adjusted R-squared: 0.902
## F-statistic: 56.23 on 1 and 5 DF, p-value: 0.0006669
##
## [1] "RMSE for sheet Leu : 15.1489222842836"
## [1] "Regression results for sheet: Lys"
##
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
##
## Residuals:
## 1 2 3 4 5 6 7
## -19.148 35.897 12.357 2.439 -9.350 -10.575 -11.620
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 64.77806 9.45907 6.848 0.00101 **
## Concentration 0.13381 0.02227 6.008 0.00184 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 20.73 on 5 degrees of freedom
## Multiple R-squared: 0.8783, Adjusted R-squared: 0.854
## F-statistic: 36.1 on 1 and 5 DF, p-value: 0.001835
##
## [1] "RMSE for sheet Lys : 17.5180036336255"
## [1] "Regression results for sheet: Met"
##
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
##
## Residuals:
## 1 2 3 4 5 6 7
## -8.429 16.117 3.011 2.802 -4.325 -4.622 -4.553
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.480e+02 4.125e+00 35.89 3.16e-07 ***
## Concentration 1.118e-01 9.713e-03 11.51 8.67e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.039 on 5 degrees of freedom
## Multiple R-squared: 0.9637, Adjusted R-squared: 0.9564
## F-statistic: 132.6 on 1 and 5 DF, p-value: 8.666e-05
##
## [1] "RMSE for sheet Met : 7.63936633688684"
## [1] "Regression results for sheet: Phe"
##
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
##
## Residuals:
## 1 2 3 4 5 6 7
## -19.172 34.100 20.020 6.385 -11.408 -13.894 -16.032
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 38.37437 10.31016 3.722 0.013684 *
## Concentration 0.17714 0.02428 7.297 0.000757 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22.59 on 5 degrees of freedom
## Multiple R-squared: 0.9142, Adjusted R-squared: 0.897
## F-statistic: 53.24 on 1 and 5 DF, p-value: 0.0007569
##
## [1] "RMSE for sheet Phe : 19.0942111872395"
## [1] "Regression results for sheet: Pro"
##
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
##
## Residuals:
## 1 2 3 4 5 6 7
## -10.743 17.058 18.999 9.683 -9.394 -12.471 -13.132
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 30.55712 7.29396 4.189 0.00858 **
## Concentration 0.07668 0.01717 4.465 0.00661 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.98 on 5 degrees of freedom
## Multiple R-squared: 0.7995, Adjusted R-squared: 0.7594
## F-statistic: 19.93 on 1 and 5 DF, p-value: 0.006611
##
## [1] "RMSE for sheet Pro : 13.5082733134469"
## [1] "Regression results for sheet: Ser"
##
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
##
## Residuals:
## 1 2 3 4 5 6 7
## -6.343 10.750 8.333 4.370 -4.780 -5.893 -6.437
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 47.359338 3.781670 12.523 5.76e-05 ***
## Concentration 0.045617 0.008904 5.123 0.0037 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.287 on 5 degrees of freedom
## Multiple R-squared: 0.84, Adjusted R-squared: 0.808
## F-statistic: 26.25 on 1 and 5 DF, p-value: 0.003698
##
## [1] "RMSE for sheet Ser : 7.00357552494693"
## [1] "Regression results for sheet: Thr"
##
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
##
## Residuals:
## 1 2 3 4 5 6 7
## -11.708 19.097 20.341 5.779 -9.578 -10.921 -13.009
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 31.80307 7.44048 4.274 0.007906 **
## Concentration 0.12832 0.01752 7.325 0.000744 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.3 on 5 degrees of freedom
## Multiple R-squared: 0.9147, Adjusted R-squared: 0.8977
## F-statistic: 53.65 on 1 and 5 DF, p-value: 0.0007436
##
## [1] "RMSE for sheet Thr : 13.7796237009493"
## [1] "Regression results for sheet: Trp"
##
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
##
## Residuals:
## 1 2 3 4 5 6 7
## -10.7926 21.6908 -0.3616 0.7963 -3.2713 -3.9285 -4.1331
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 109.77974 5.12642 21.41 4.12e-06 ***
## Concentration 0.08136 0.01207 6.74 0.00109 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.23 on 5 degrees of freedom
## Multiple R-squared: 0.9009, Adjusted R-squared: 0.881
## F-statistic: 45.43 on 1 and 5 DF, p-value: 0.00109
##
## [1] "RMSE for sheet Trp : 9.4940183703731"
## [1] "Regression results for sheet: Tyr"
##
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
##
## Residuals:
## 1 2 3 4 5 6 7
## -13.963 24.903 15.054 2.973 -8.422 -9.533 -11.012
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 41.45233 7.45317 5.562 0.002585 **
## Concentration 0.17978 0.01755 10.244 0.000152 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.33 on 5 degrees of freedom
## Multiple R-squared: 0.9545, Adjusted R-squared: 0.9454
## F-statistic: 104.9 on 1 and 5 DF, p-value: 0.0001522
##
## [1] "RMSE for sheet Tyr : 13.8031186392565"
## [1] "Regression results for sheet: Val"
##
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
##
## Residuals:
## 1 2 3 4 5 6 7
## -8.949 11.573 24.205 19.073 -11.117 -16.295 -18.490
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 31.34478 8.88338 3.528 0.0168 *
## Concentration 0.08280 0.02092 3.959 0.0108 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.47 on 5 degrees of freedom
## Multiple R-squared: 0.7581, Adjusted R-squared: 0.7098
## F-statistic: 15.67 on 1 and 5 DF, p-value: 0.01076
##
## [1] "RMSE for sheet Val : 16.4518448713978"
1.4 Figure1
rm(list=ls())
# Load necessary libraries
library(readxl)
library(dplyr)
library(tidyr)
library(ggplot2)
library(openxlsx)
library(extrafont)
library(reshape2)
library(car)
library(cowplot)
library(gridExtra)
# Load fonts for Windows device
loadfonts(device = "win")
#_________________
# Read data
#_______________________
sheet_names <- excel_sheets("Data.xlsx")
read_and_assign_sheets <- function(file_path) {
sheet_names <- excel_sheets(file_path)
for (sheet in sheet_names) {
data <- read.xlsx(file_path, sheet = sheet, colNames = TRUE)
data$Con[is.na(data$Con)] <- paste0("rep", seq_len(sum(is.na(data$Con))))
assign(sheet, data, envir = .GlobalEnv)
}
}
file_path <- "Data.xlsx"
read_and_assign_sheets(file_path)
#__________________________________________
plot_regression_separate <- function(data, sheet_name) {
average_row <- data %>% filter(Con == "Average")
concentration_values <- as.numeric(names(data)[-1])
y_values <- as.numeric(average_row[-1])
regression_data <- data.frame(Concentration = concentration_values, Mean = y_values)
# Split data for first 4 points
first_part <- regression_data[4:7, ]
# Perform linear regression for first 4 points (red line)
lm_model_first <- lm(Mean ~ Concentration, data = first_part)
# Compute predictions for each model
predictions_first <- predict(lm_model_first)
# Compute RMSE for each model
residuals_first <- first_part$Mean - predictions_first
rmse_first <- sqrt(mean(residuals_first^2))
# Extract R-squared and coefficients for each model
r_squared_first <- summary(lm_model_first)$r.squared
intercept_first <- coef(lm_model_first)[1]
slope_first <- coef(lm_model_first)[2]
equation_label_first <- paste0("Y = ", round(slope_first, 2), "X + ", round(intercept_first, 2))
r_squared_label_first <- paste0("R² = ", round(r_squared_first, 3))
rmse_label_first <- paste0("RMSE = ", round(rmse_first, 3))
r2_calcite_split <- data.frame(equation_label_first, r_squared_label_first,rmse_label_first)
table_data <- data.frame(
Values = c(
paste(r2_calcite_split$equation_label_first),
paste(r2_calcite_split$r_squared_label_first),
paste(r2_calcite_split$rmse_label_first)
)
)
table1 <- tableGrob(
table_data,
rows = NULL,
cols = NULL,
theme = ttheme_minimal(
core = list(
fg_params = list(fontface = "bold", fontsize = 20, col = "black"),
bg_params = list(fill = "white", col = "black", lwd = 1)
),
colhead = list(fg_params = list(col="black", fontface="bold", fontsize = 20))
)
)
repetition_point <- data %>%
filter(!Con %in% c("Average", "STDEV")) %>%
gather(key = "Concentration", value = "Value", -Con) %>%
mutate(Concentration = as.numeric(Concentration),
Value = as.numeric(Value)) # Convert both columns to numeric
first_points <- repetition_point %>%
filter(Concentration >= 1 & Concentration <= 50)
# First 4 points plot
p1 <- ggplot(first_part, aes(x = Concentration, y = Mean)) +
geom_point(aes(x = Concentration, y = Mean, color = "Average Points"), size = 10) + # Plot the average points
geom_point(data = first_points, aes(x = Concentration, y = Value, color = "Repetition Points"), alpha = 0.6,size=5) + # Plot the repetitions
geom_smooth(method = "lm", se = FALSE, color = "navyblue", size = 3.5) +
labs(title = paste(sheet_name, "- First 4 Points")) +
scale_color_manual(values = c("Average Points" = "navyblue", "Repetition Points" = "#B8860B")) + # Define colors in the legend
scale_y_continuous(limits = c(0, 160), breaks = seq(0, 160, 30))+ # Flexible scaling
theme_minimal() +
theme(
plot.title = element_text(size = 30, family = "Times New Roman", face = "bold"),
axis.text = element_text(size = 30, family = "Times New Roman", face = "bold"),
axis.title.x = element_text(size = 30, family = "Times New Roman", face = "bold"),
axis.title.y = element_text(size = 30, family = "Times New Roman",face = "bold", margin = margin(r = 60)),
legend.text = element_text(size = 25, family = "Times New Roman", face = "bold"),
legend.title =element_blank(),
legend.position = c(0.1, 0.85),
legend.direction = "vertical",
panel.background = element_blank(),
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "black", fill = NA)
#aspect.ratio = 1.3/1
)+
scale_x_continuous(breaks = seq(0, 50, 10), limits = c(0, 50))
# Conditional table position
if (sheet_name %in% c("Met", "Trp", "Aspg", "Arg", "Lys","His")) {
xmin_val <- 30
xmax_val <- 40
ymin_val <- 20
ymax_val <- 40
} else {
xmin_val <- 10
xmax_val <- 20
ymin_val <- 80
ymax_val <- 90
}
# Add table annotation
p1 <- p1 +
annotation_custom(
grob = table1,
xmin = xmin_val,
xmax = xmax_val,
ymin = ymin_val,
ymax = ymax_val
)
# Conditional legend position for "Met"
if (sheet_name == "Met") {
p1 <- p1 + theme(legend.position = c(0.1, 0.7))
}
# Perform linear regression: Mean (Y) ~ Concentration (X)
lm_model <- lm(Mean ~ Concentration, data = regression_data)
# Compute predictions based on the linear model
lm_predictions <- predict(lm_model)
# Compute RMSE for the linear model
lm_residuals <- regression_data$Mean - lm_predictions
lm_rmse <- sqrt(mean(lm_residuals^2))
# Extract R-squared and coefficients for the linear model
lm_r_squared <- summary(lm_model)$r.squared
lm_intercept <- coef(lm_model)[1]
lm_slope <- coef(lm_model)[2]
# Perform loess regression
loess_model <- loess(Mean ~ Concentration, data = regression_data)
# Compute predictions based on the loess model
loess_predictions <- predict(loess_model)
# Compute RMSE for the loess model
loess_residuals <- regression_data$Mean - loess_predictions
loess_rmse <- sqrt(mean(loess_residuals^2))
# Calculate a pseudo R-squared for loess (using 1 - (SSE/SST))
sst <- sum((regression_data$Mean - mean(regression_data$Mean))^2) # Total Sum of Squares
sse_loess <- sum(loess_residuals^2) # Residual Sum of Squares for Loess
loess_r_squared <- 1 - (sse_loess / sst)
# Prepare the labels
lm_r_squared_label <- paste0("R² = ", round(lm_r_squared, 3))
lm_rmse_label <- paste0("RMSE = ", round(lm_rmse, 3))
loess_r_squared_label <- paste0("R² = ", round(loess_r_squared, 3)) # Loess R-squared label
loess_rmse_label <- paste0("RMSE = ", round(loess_rmse, 3))
# Create the data frame for the table (WITHOUT the Equation row)
table_data2 <- data.frame(
LinearModel = c(lm_r_squared_label, lm_rmse_label),
LoessModel = c(loess_r_squared_label, loess_rmse_label)
)
table2 <- tableGrob(
table_data2,
rows = NULL,
theme = ttheme_minimal(
core = list(
fg_params = list(fontface = "bold", fontsize = 20, col = "black"),
bg_params = list(fill = "white", col = "black", lwd = 1)
),
colhead = list(fg_params = list(col="black", fontface="bold", fontsize = 20))
)
)
# Extract all non-average rows (i.e., repetitions) to plot as points, ensuring numeric values
repetition_data <- data %>%
filter(!Con %in% c("Average", "STDEV")) %>%
gather(key = "Concentration", value = "Value", -Con) %>%
mutate(Concentration = as.numeric(Concentration),
Value = as.numeric(Value)) # Convert both columns to numeric
# All points plot
p2 <- ggplot(regression_data, aes(x = Concentration, y = Mean)) +
geom_point(data = regression_data, aes(x = Concentration, y = Mean, color = "Average Points"), size = 10) + # Plot the average points
geom_point(data = repetition_data, aes(x = Concentration, y = Value, color = "Repetition Points"), alpha = 0.6,size=5) + # Plot the repetitions
geom_smooth(method = "lm", se = FALSE, color = "navyblue", size = 2) +
geom_smooth(method = "loess", se = FALSE, color = "navyblue", linetype = "dashed", size = 3.5) +
labs(title = paste(sheet_name, "- All Points")) +
theme_minimal() +
scale_x_continuous(breaks = seq(0, 1000, 200), limits = c(0, 1000)) +
scale_y_continuous(limits = c(0, 260), breaks = seq(0, 260, 50))+ # Flexible scaling
scale_color_manual(values = c("Average Points" = "navyblue", "Repetition Points" = "#B8860B")) + # Define colors in the legend
theme(
plot.title = element_text(size = 30, family = "Times New Roman", face = "bold"),
axis.text = element_text(size = 30, family = "Times New Roman", face = "bold"),
axis.title.x = element_text(size = 30, family = "Times New Roman", face = "bold"),
axis.title.y = element_text(size = 30, family = "Times New Roman",face = "bold", margin = margin(r = 60)),
legend.text = element_text(size = 25, family = "Times New Roman", face = "bold"),
legend.title =element_blank(),
legend.position = c(0.08, 0.85),
legend.direction = "vertical",
panel.background = element_blank(),
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "black", fill = NA)
)
p2 <- p2 +
annotation_custom(
grob = table2,
xmin =750, # Position table near the right edge
xmax =850, # Slightly left of the rightmost edge
ymin = 35, # Slightly below the top
ymax = 45 # Align to the top edge
)
# Combine the two plots in a grid
combined_plot <- plot_grid(p1, p2, ncol = 2, rel_widths = c(0.5, 1), align = "v", axis = "lr") +
theme(plot.margin = margin(10, 10, 10, 10),
plot.background = element_rect(color = "black", size = 2)) # Add a border around the entire combined plot
return(combined_plot)
}
# List of plots
plots <- list()
# Create and store plots for each sheet
for (sheet in sheet_names) {
data <- get(sheet) # Fetch the data frame (e.g., AIa, Asp, etc.)
# Generate the individual plot and store it
p <- plot_regression_separate(data, sheet)
plots[[sheet]] <- p
}
# Arrange all the plots in a 3x7 grid layout using cowplot
combined_plot <- plot_grid(plotlist = plots, ncol = 2, nrow = 11)
ggsave("combined_plot.png", plot = combined_plot, width = 40, height = 50 , units = "in",limitsize = FALSE)
knitr::include_graphics("combined_plot.png")

1.5 Figure 2
#___________________
#barplot
#____________________
combine_data_for_individual_barplots <- function(sheet_names) {
combined_data <- data.frame()
for (sheet in sheet_names) {
data <- get(sheet) # Fetch the data frame (e.g., AIa, Asp, etc.)
# Extract both 'Average' and 'STDEV' rows
mean_data <- data %>%
filter(Con == "Average") %>%
gather(key = "Concentration", value = "Mean", -Con) %>%
mutate(Concentration = as.numeric(Concentration), # Convert Concentration to numeric
Mean = as.numeric(Mean)) # Ensure Mean is numeric
stdev_data <- data %>%
filter(Con == "STDEV") %>%
gather(key = "Concentration", value = "STDEV", -Con) %>%
mutate(Concentration = as.numeric(Concentration), # Convert Concentration to numeric
STDEV = as.numeric(STDEV)) # Ensure STDEV is numeric
# Combine Mean and STDEV into a single data frame
combined_mean_stdev <- mean_data %>%
select(-Con) %>%
left_join(stdev_data %>% select(-Con), by = "Concentration") %>%
mutate(Variable = sheet) # Add variable name
# Combine with previous data
combined_data <- rbind(combined_data, combined_mean_stdev)
}
return(combined_data)
}
# Function to create a barplot for each variable with error bars
create_individual_barplot <- function(data, variable_name) {
p <- ggplot(data, aes(x = factor(Concentration), y = Mean)) +
geom_bar(stat = "identity", fill = "#B8860B", width = 0.7) + # Barplot with means
geom_errorbar(aes(ymin = Mean - STDEV, ymax = Mean + STDEV), width = 0.4, color = "navyblue") + # Error bars using STDEV
labs(title = variable_name,
x = expression(bold(paste("[Asp] (", mu, "M)"))), # Custom x-axis label
y = expression(bold(Delta~"admittance ("*mu*"A/V)")))+
theme_minimal() +
scale_y_continuous(limits = c(0, 260), breaks = seq(0, 260, 50))+ # with some padding
theme(
plot.title = element_text(size = 20, family = "Times New Roman", face = "bold"),
axis.text = element_text(size = 20, family = "Times New Roman", face = "bold"),
axis.title.x = element_text(size = 20, family = "Times New Roman", face = "bold"),
axis.title.y = element_text(size = 20, family = "Times New Roman",face = "bold", margin = margin(r = 30)),
legend.text = element_text(size = 15, family = "Times New Roman", face = "bold"),
legend.title =element_blank(),
legend.position = NULL,
legend.direction = "vertical",
panel.background = element_blank(),
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "black", fill = NA)
)
return(p)
}
# Function to create all barplots for each variable and combine them
create_combined_barplots <- function(sheet_names) {
combined_data <- combine_data_for_individual_barplots(sheet_names)
plots <- list()
# Loop over each unique variable and create a barplot for each
for (variable in unique(combined_data$Variable)) {
variable_data <- combined_data %>% filter(Variable == variable)
p <- create_individual_barplot(variable_data, variable)
plots[[variable]] <- p
}
# Combine all plots into one grid layout
combined_plot <- cowplot::plot_grid(plotlist = plots, ncol = 4) # Adjust ncol to control number of columns
return(combined_plot)
}
combined_barplots <- create_combined_barplots(sheet_names)
ggsave("combined_individual_barplots.png", plot = combined_barplots, width = 20, height = 17)
knitr::include_graphics("combined_individual_barplots.png")

1.6 Figure3
###_____________________
#boxplot
#________________________
combine_all_data <- function(sheet_names) {
combined_data <- data.frame()
for (sheet in sheet_names) {
data <- get(sheet) # Fetch the data frame (e.g., AIa, Asp, etc.)
# Filter for the row where "Con" is "Average"
clean_data <- data %>%
filter(Con == "Average") %>%
gather(key = "Concentration", value = "Value", -Con) %>%
mutate(Concentration = as.numeric(Concentration)) # Convert Concentration to numeric
# Add a column to label the variable (sheet name)
clean_data$Variable <- sheet
# Combine with previous data
combined_data <- rbind(combined_data, clean_data)
}
return(combined_data)
}
# Function to create a single boxplot with 21 boxes and 5 jittered repetition points for each
create_combined_boxplot <- function(combined_data) {
# Ensure the 'Value' column is numeric
combined_data$Value <- as.numeric(combined_data$Value)
# Get the range of the y-values to determine appropriate limits
y_limits <- range(combined_data$Value, na.rm = TRUE)
# Create the boxplot with jittered points for all variables
p <- ggplot(combined_data, aes(x = Variable, y = Value)) +
geom_boxplot(outlier.shape = NA, fill = NA) + # Boxplot without showing outliers
geom_jitter(aes(color = Concentration), width = 0.2, size = 4) +
scale_color_gradient(low = "#B8860B", high = "navyblue", name = "Concentration") + # Gradient color scale
labs(
x = "Variable",
y = expression(bold(Delta~"admittance ("*mu*"A/V)")))+
theme_minimal() +
scale_y_continuous(limits = c(0, 260), breaks = seq(0, 260, 50))+ # with some padding
theme(
plot.title = element_text(size = 20, family = "Times New Roman", face = "bold"),
axis.text = element_text(size = 20, family = "Times New Roman",face = "bold"),
axis.title.y = element_text(size = 20, family = "Times New Roman",face = "bold"),
axis.title.x = element_text(size = 20, family = "Times New Roman", face="bold"),
legend.text = element_text(size = 20, family = "Times New Roman", face = "bold"),
legend.title =element_blank(),
legend.position = c(0.1, 0.8),
legend.direction = "vertical",
panel.background = element_blank(),
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "black", fill = NA)
)
return(p)
}
# Combine data from all sheets into one data frame
combined_data <- combine_all_data(sheet_names)
# Create and display the adjusted combined boxplot with 5 repetition points per box
combined_boxplot <- create_combined_boxplot(combined_data)
ggsave("combined_boxplot.png", plot = combined_boxplot, width = 16, height = 8)
knitr::include_graphics("combined_boxplot.png")
